#!/bin/bash
# Master run file for "Extractive versus Generative Language Models for Political Conflict Text Classification"
#
# This script fulfills the Political Analysis replication requirements.
#
# USAGE:
#   bash run.sh         (Default) Runs fast verification, generating all figures and tables
#                       from the pre-computed data files.
#
#   bash run.sh full    (Optional) Runs the entire data recreation pipeline first. This is
#                       computationally expensive and will overwrite existing model outputs.

# Stop on first error
set -e

# Log all commands and output to run.log
exec > >(tee run.log) 2>&1

echo "REPLICATION RUN STARTED AT: $(date)"
echo "--------------------------------------------------"
echo "SYSTEM INFORMATION:"
echo "OS: $(uname -a)"
echo "Python Version: $(python3 --version)"
echo "R Version: $(R --version | grep 'R version')"
echo "--------------------------------------------------"

# --- Dependency Checks ---
echo "Checking/installing Python dependencies from requirements.txt..."
pip install -r requirements.txt

echo "Checking/installing R package dependencies..."
Rscript -e 'packages <- c("jsonlite", "data.table", "ROCR", "zoo"); new_packages <- packages[!(packages %in% installed.packages()[,"Package"])]; if(length(new_packages)) install.packages(new_packages, repos="http://cran.rstudio.com/")'
echo "R packages are up to date."


# --- Create output directories ---
echo "Creating output directories..."
mkdir -p figures tables


# --- Full Data Recreation Pipeline (Optional) ---
if [[ "$1" == "full" ]]; then
    echo "[MODE] Starting FULL data recreation. This will be slow."

    # Check 1: See if the 'ollama' command exists at all.
    if ! command -v ollama &> /dev/null; then
        echo "------------------------------------------------------------"
        echo "ERROR: The 'ollama' command is not found on your system."
        echo "Ollama is required for the full data recreation."
        echo "Please install it from: https://ollama.com/download"
        echo "Then, ensure the Ollama application is running and try again."
        echo "------------------------------------------------------------"
        exit 1
    fi
    
    # Check 2: See if the Ollama application/server is actually running.
    if ! ollama ps >/dev/null 2>&1; then
        echo "Error: Ollama application is not running. Please start it and try again."
        exit 1
    fi
    
    # Check 3: See if all the necessary models have been pulled.
    echo "--- Checking for required Ollama models ---"
    required_ollama_models=(
        "gemma2:9b"
        "llama3.1:latest"
        "qwen2.5:14b"
    )

    for model in "${required_ollama_models[@]}"; do
        echo "Checking for model: $model"
        if ! ollama list | grep -F -w -q "$model"; then
            echo "------------------------------------------------------------"
            echo "ERROR: Required Ollama model '$model' is not available."
            echo "Please pull the model by running the following command:"
            echo ""
            echo "    ollama pull $model"
            echo ""
            echo "Then, re-run this script: bash run.sh full"
            echo "------------------------------------------------------------"
            exit 1
        fi
    done
    echo "All required Ollama models are available."

    echo "--- (1/4) Recreating multi-class event data..."
    python3 code/05_recreate_multiclass_data.py
    
    echo "--- (2/4) Recreating Binary Classification inference data..."
    python3 code/recreate_bc_inference.py
    
    echo "--- (3/4) Recreating NER data (ConfliBERT)..."
    python3 code/recreate_ner_step1_conflibert.py

    echo "--- (4/4) Recreating NER data (Ollama Models)..."
    python3 code/recreate_ner_step2_ollama.py
    
    echo "Full data recreation complete. Proceeding to verification."
fi


# --- Verification Pipeline (Default) ---
echo "[MODE] Starting analysis and verification."

echo "(1/2) Generating Figures 1-4 using R..."
Rscript code/01_generate_figures.R

echo "(2/2) Generating all manuscript tables using Python..."
python3 code/02_generate_tables.py


echo "--------------------------------------------------"
echo "REPLICATION RUN FINISHED AT: $(date)"
echo "All outputs are in the /figures and /tables directories."